#################################################################################### 
####################################################################################
####################################################################################
# Appendix J: Tables 31--32
####################################################################################
####################################################################################
library(here)
library(kableExtra)
library(lmtest)
library(multiwayvcov)
####################################################################################
####################################################################################
## Read in survey data (from: analysis_villages.R)
dat = read.csv(here("data/master_called_villages.csv"))

####################################################################################
## Updated treatment status

# Create treat column defining actual status (not assignment)
dat$treat = 0 
dat[!is.na(dat$chp) & dat$chp == 1 ,]$treat = 1

# Treated
vils = c("KAKOOLA", "BWANYA", "KAITABAWALA", "BUMWERU", "SENKULU", "AMVUA")
dat[dat$villagename %in% vils,]$treat = 1

# Stopped treatment
# dat[dat$villagename == "NANNYINZI",]$treat = 0
# dat[dat$villagename == "IBAARE",]$treat = 0
rm(vils)

####################################################################################
## Cleaning Data

## NGO Presence
# Replace None/na with actual NA for NGO name columns
dat[, c("ngo1", "ngo2", "ngo3")] = apply(dat[, c("ngo1", "ngo2", "ngo3")], 2, function(x){ ifelse(!is.na(x) & x %in% c("None", "na"), NA, x) } )

# Move names recorded in binary NGOs column to NGO name column 
dat[is.na(dat$ngos) | dat$ngos == "None",]$ngos = "0"
dat[!dat$ngos %in%  c("0", "1") & !is.na(dat$ngo1) & is.na(dat$ngo2),]$ngo2 = dat[!dat$ngos %in%  c("0", "1") & !is.na(dat$ngo1) & is.na(dat$ngo2),]$ngos
dat[!dat$ngos %in%  c("0", "1") & is.na(dat$ngo1),]$ngo1 = dat[!dat$ngos %in%  c("0", "1") & is.na(dat$ngo1),]$ngos
#dat[!dat$ngos %in%  c("0", "1") , c("ngos","ngo1", "ngo2", "ngo3")]
# Replace names recorded in binary NGOs column with binary indicator
dat[!dat$ngos == "0",]$ngos = "1"
dat$ngos = as.numeric(dat$ngos)

# Count NGOs by sector
#dat[!is.na(dat$ngo1) & dat$ngo1 == "Living goods",]$ngo1 = "Living goods/1"
dat[!is.na(dat$ngo1) & dat$ngo1 == "Living goods",]$ngo1 = NA
dat[!is.na(dat$ngo1) & dat$ngo1 == "Hear Uganda",]$ngo1 = "Hear Uganda/3"
dat[!is.na(dat$ngo1) & dat$ngo1 == "PAPE-1",]$ngo1 = "PAPE/1"
#dat[!is.na(dat$ngo2) & dat$ngo2 == "BRAC Uganda",]$ngo2 = "BRAC Uganda/1"
dat[!is.na(dat$ngo2) & dat$ngo2 == "BRAC Uganda",]$ngo2 = NA

# Count NGOs
dat$ngo_count = 0
dat$ngo_count = apply(dat[, c("ngo1", "ngo2", "ngo3")], 1, function(x) {sum(!is.na(x))})
# Count health NGOs
dat$ngo_health = 0
dat$ngo_health = apply(dat[, c("ngo1", "ngo2", "ngo3")], 1, function(x) {sum(!is.na(x) & grepl("/1$", x, perl=TRUE))})
# Count education NGOs
dat$ngo_education = 0
dat$ngo_education = apply(dat[, c("ngo1", "ngo2", "ngo3")], 1, function(x) {sum(!is.na(x) & grepl("/2$", x, perl=TRUE))})
# Count other NGOOs
dat$ngo_other = 0
dat$ngo_other = apply(dat[, c("ngo1", "ngo2", "ngo3")], 1, function(x) {sum(!is.na(x) & grepl("/3$", x, perl=TRUE))})

## Health Facility Access
# Replace None/na with actual NA for HF name columns
dat[, c("name_med1", "name_med2", "name_med3", "name_med4")] = apply(dat[, c("name_med1", "name_med2", "name_med3", "name_med4")], 2, function(x){ ifelse(!is.na(x) & x %in% c("None", "na"), NA, x) } )

# Count Health Centers
dat$hf_count = 0
dat$hf_count = apply(dat[, c("name_med1", "name_med2", "name_med3", "name_med4")], 1, function(x) {sum(!is.na(x))})
# Government
dat$hf_govt = 0
dat$hf_govt = apply(dat[, c("name_med1", "name_med2", "name_med3", "name_med4")], 1, function(x) {sum(!is.na(x) & grepl("/1$", x, perl=TRUE))})
# Church
dat$hf_church = 0
dat$hf_church = apply(dat[, c("name_med1", "name_med2", "name_med3", "name_med4")], 1, function(x) {sum(!is.na(x) & grepl("/2$", x, perl=TRUE))})
# NGO
dat$hf_ngo = 0
dat$hf_ngo = apply(dat[, c("name_med1", "name_med2", "name_med3", "name_med4")], 1, function(x) {sum(!is.na(x) & grepl("/3$", x, perl=TRUE))})
# Private
dat$hf_pfp = 0
dat$hf_pfp = apply(dat[, c("name_med1", "name_med2", "name_med3", "name_med4")], 1, function(x) {sum(!is.na(x) & grepl("/4$", x, perl=TRUE))})

## School Access
# Replace None/na with actual NA for School name columns
dat[, c("name_sch1", "name_sch2", "name_sch3", "name_sch4")] = apply(dat[, c("name_sch1", "name_sch2", "name_sch3", "name_sch4")], 2, function(x){ ifelse(!is.na(x) & x %in% c("None", "na"), NA, x) } )
# Count Schools
dat$s_count = 0
dat$s_count = apply(dat[, c("name_sch1", "name_sch2", "name_sch3", "name_sch4")], 1, function(x) {sum(!is.na(x))})
# Government
dat$s_govt = 0
dat$s_govt = apply(dat[, c("name_sch1", "name_sch2", "name_sch3", "name_sch4")], 1, function(x) {sum(!is.na(x) & grepl("/1$", x, perl=TRUE))})
# Church
dat$s_church = 0
dat$s_church = apply(dat[, c("name_sch1", "name_sch2", "name_sch3", "name_sch4")], 1, function(x) {sum(!is.na(x) & grepl("/2$", x, perl=TRUE))})
# NGO
dat$s_ngo = 0
dat$s_ngo = apply(dat[, c("name_sch1", "name_sch2", "name_sch3", "name_sch4")], 1, function(x) {sum(!is.na(x) & grepl("/3$", x, perl=TRUE))})
# Private
dat$s_pfp = 0
dat$s_pfp = apply(dat[, c("name_sch1", "name_sch2", "name_sch3", "name_sch4")], 1, function(x) {sum(!is.na(x) & grepl("/4$", x, perl=TRUE))})

## Public Goods
dat$satisfaction_vht = as.numeric(dat$satisfaction_vht)
# Electricity
dat[!is.na(dat$electricity) & dat$electricity == 0,]$year_grid = NA
dat$year_grid = as.numeric(dat$year_grid)
dat$grid_access_years = 2019 - dat$year_grid
dat[!is.na(dat$electricity) & dat$electricity == 0 & is.na(dat$grid_access_years),]$grid_access_years = 0

# Water
dat[!is.na(dat$piped_h2o) & dat$piped_h2o == 0,]$year_h2o = NA
dat$year_h2o = as.numeric(dat$year_h2o)
dat$h2o_access_years = 2019 - dat$year_h2o
dat[!is.na(dat$piped_h2o) & dat$piped_h2o == 0 & is.na(dat$h2o_access_years),]$h2o_access_years = 0
dat$main_h2o_source = NA
dat[!is.na(dat$main_h2o) & dat$main_h2o == "1. piped water",]$main_h2o_source = 4
dat[!is.na(dat$main_h2o) & dat$main_h2o == "2. borehole",]$main_h2o_source = 3
dat[!is.na(dat$main_h2o) & dat$main_h2o == "3. protected spring",]$main_h2o_source = 2
dat[!is.na(dat$main_h2o) & dat$main_h2o == "4. shallow well",]$main_h2o_source = 1

# Sewage
dat$sewa = as.numeric(dat$sewa)
dat[!is.na(dat$sewa) & dat$sewa == 0,]$year_sewa = NA
dat$year_sewa = as.numeric(dat$year_sewa)
dat$sewa_access_years = 2019 - dat$year_sewa
dat[!is.na(dat$sewa) & dat$sewa == 0 & is.na(dat$sewa_access_years),]$sewa_access_years = 0

# Roads Upgrades
dat[, c("road_upgr1", "raod_upgr2", "road_upgr3")] = apply(dat[, c("road_upgr1", "raod_upgr2", "road_upgr3")], 2, function(x) {as.numeric(x)})
dat$road_upgrades = 0
dat$road_upgrades = apply(dat[, c("road_upgr1", "raod_upgr2", "road_upgr3")], 1, function(x) {sum(!is.na(x))})
dat$road_upgrades_year = apply(dat[, c("road_upgr1", "raod_upgr2", "road_upgr3")], 1, max, na.rm = T)
dat[is.infinite(dat$road_upgrades_year) ,]$road_upgrades_year = NA

####################################################################################
## Post PG Access Full > 2011

## Grid Access
# Post-treatment
dat$grid_post = 0
dat[!is.na(dat$year_grid) & dat$year_grid > 2010,]$grid_post = 1
# Pre-treatment
dat$grid_pre = 0
dat[!is.na(dat$year_grid) & dat$year_grid <= 2010,]$grid_pre = 1

# Post-study (control)
dat$grid_post_c = 0
dat[!is.na(dat$year_grid) & dat$year_grid > 2013,]$grid_post_c = 1
# Pre-study (control)
dat$grid_pre_c = 0
dat[!is.na(dat$year_grid) & dat$year_grid <= 2013,]$grid_pre_c = 1

## Piped Water
# Post-treatment
dat$water_post = 0
dat[!is.na(dat$year_h2o) & dat$year_h2o > 2010,]$water_post = 1
# Pre-treatment
dat$water_pre = 0
dat[!is.na(dat$year_h2o) & dat$year_h2o <= 2010,]$water_pre = 1

# Post-study (control)
dat$water_post_c = 0
dat[!is.na(dat$year_h2o) & dat$year_h2o > 2013,]$water_post_c = 1
# Pre-study (control)
dat$water_pre_c = 0
dat[!is.na(dat$year_h2o) & dat$year_h2o <= 2013,]$water_pre_c = 1

## Sewage
# Post-treatment
dat$sewa_post = 0
dat[!is.na(dat$year_sewa) & dat$year_sewa > 2010,]$sewa_post = 1
# Pre-treatment
dat$sewa_pre = 0
dat[!is.na(dat$year_sewa) & dat$year_sewa <= 2010,]$sewa_pre = 1

# Post-study (control)
dat$sewa_post_c = 0
dat[!is.na(dat$year_sewa) & dat$year_sewa > 2013,]$sewa_post_c = 1
# Pre-study (control)
dat$sewa_pre_c = 0
dat[!is.na(dat$year_sewa) & dat$year_sewa <= 2013,]$sewa_pre_c = 1

## Road Upgrades
# Post-treatment
dat$road_post = 0
dat$road_post = apply(dat[, c("road_upgr1", "raod_upgr2", "road_upgr3")], 1, function(x) {sum(!is.na(x) & x > 2010)})
# Pre-treatment
dat$road_pre = 0
dat$road_pre = apply(dat[, c("road_upgr1", "raod_upgr2", "road_upgr3")], 1, function(x) {sum(!is.na(x) & x <= 2010)})

# Post-study (control)
dat$road_post_c = 0
dat$road_post_c = apply(dat[, c("road_upgr1", "raod_upgr2", "road_upgr3")], 1, function(x) {sum(!is.na(x) & x > 2013)})
# Pre-study (control)
dat$road_pre_c = 0
dat$road_pre_c = apply(dat[, c("road_upgr1", "raod_upgr2", "road_upgr3")], 1, function(x) {sum(!is.na(x) & x <= 2013)})

####################################################################################
## Balance in Control villages (treated control vs pure control)
data = dat
dat = dat[!is.na(dat$resp_name) & dat$treatment == 0,]
table(dat$treat)


####################################################################################
####################################################################################
## Table J.31

## Control group balance
# Years Access
model = formula("treat ~ grid_post_c + grid_pre_c + water_post_c + water_pre_c + sewa_post_c + sewa_pre_c + road_post_c + road_pre_c")
#model = formula("treat ~ grid_post_c + water_post_c + sewa_post_c + road_post_c")
balance_test = xBalance(model,
                        strata = factor(dat$district_name), #list(noblocks=NULL), cluster = factor(dat$village_id), 
                        data = dat, na.rm = T,
                        report=c("adj.means","adj.mean.diffs","z.scores","chisquare.test","p.values"))
rownames(balance_test$results) = c("Grid Access (Post)","Grid Access (Pre)","Piped Water (Post)", "Piped Water (Pre)","Sewage Access (Post)",
                                   "Sewage Access (Pre)", "Road Upgrade (Post)", "Road Upgrade (Pre)")
rownames(balance_test$overall) = c("Overall Test Statistic")

kable(round(balance_test$results,2),align = "c", booktabs = T, caption = "Village-Level Block-Adjusted Omnibus Balance Test (Original Study Control)", 
      col.names = c("Control Mean", "Treatment Mean", "Difference","Z-score","P-value"), format = "latex")
kable(round(balance_test$overall,2), align = "c", row.names = T,
      col.names = c("Chi-Sq", "Df", "P-value"), format = "latex")

####################################################################################
####################################################################################
## Table J.32

## Balance in Sample villages 

## Sample Balance
# Access Years
model = formula("treat ~ grid_post + grid_pre + water_post + water_pre + sewa_post + sewa_pre + road_post + road_pre")
#model = formula("treat ~ grid_post + water_post + sewa_post + road_post")
balance_test = xBalance(model,
                        strata = factor(data$district_name), #list(noblocks=NULL), cluster = factor(dat$village_id), 
                        data = data, na.rm = T,
                        report=c("adj.means","adj.mean.diffs","z.scores","chisquare.test","p.values"))

rownames(balance_test$results) = c("Grid Access (Post)","Grid Access (Pre)","Piped Water (Post)", "Piped Water (Pre)","Sewage Access (Post)",
                                   "Sewage Access (Pre)", "Road Upgrade (Post)", "Road Upgrade (Pre)")
rownames(balance_test$overall) = c("Overall Test Statistic")

kable(round(balance_test$results,2),align = "c", booktabs = T, caption = "Village-Level Block-Adjusted Omnibus Balance Test (Full Sample)", 
      col.names = c("Control Mean", "Treatment Mean", "Difference","Z-score","P-value"), format = "latex")
kable(round(balance_test$overall,2), align = "c", row.names = T,
      col.names = c("Chi-Sq", "Df", "P-value"), format = "latex")

####################################################################################
####################################################################################